
* This do-file creates a dataset that only includes the CRSP-Compustat variables that will be used in the analyses.


cd "`c(pwd)'"   // Set working directory to wherever the user placed "replication_package"

* Define global paths relative to the main directory
global main_dir "`c(pwd)'"
global data_dir "$main_dir/data_analysis"
global rawdata_dir "$main_dir/data_raw"
global tables_dir "$main_dir/tables"

cd "$rawdata_dir/"

use crsp_compustat_1990_2020, clear

rename LINKDT linkdt
rename LPERMNO permno 
gen cusip8=substr(cusip,1,8)
gen cusip6=substr(cusip,1,6)

keep gvkey permno linkdt year at capx csho dlc dltt cogs oibdp ppent pstkl sale xrd prcc_f sic cusip8 cusip6 state

sort gvkey year at sale 
drop if gvkey==gvkey[_n-1] & year==year[_n-1]

cd "$data_dir/"
save crsp_compustat_1990_2020_clean, replace